# Dickstein, Ho, and Mark (2023)
# This script includes functions that are used to generate
# counterfactuals and to report counterfactual comparisons
# Adjusted for counterfactuals with different alphas.

# * # * # * # * # * # * # * # * #
# Run Counterfactual Functions #
# * # * # * # * # * # * # * # * #

RunCounterfactual_alpha <- function(
  ExplDat,
  PlanDat,
  PremVec.Init = NULL, # Initial Value of Premium vector. (for each constructed plan ID)
  behaviorShare = .01, # percent of the market that we add who are behavioral types.
  maxR = 100, #Maximum number of iterations
  tol = 10^(-3), # tolerance
  costwinsorization = 89.37, # expected cost over which we assume it this number # check max cost in exploded data
  pricelimit = 1000, # limit I put on prices.
  subsidytypeSG = "Ratio", # Other options are "Fixed" or "CSCAdjust"
  subsidytypeInd = "Ratio", # Other options are "Fixed" or "CSCAdjust"
  # Ratio: subs is fixed for each possible option at the start. 
  # Fixed: subs is a fixed $ taken off. (for SG, keeping it tax exempt)
  # CSCAdjust: subs changes given the second cheapest silver plan.
  fixed_markup = 0
  ){ 
  
  # Pre-algorithm cleaning
  ## Ensure that all plans exist in exploded data and all options exist in plan data
  nr <- nrow(ExplDat)
  nhh <- length(unique(ExplDat$hhid))
  ExplDat <- ExplDat[(plan_id %in% unique(PlanDat$plan_id))]
  ExplDat[, n := .N, by = c("hhid", "year")]
  ExplDat <- ExplDat[n > 1]
  ExplDat$n <- NULL
  print(paste0(nr - nrow(ExplDat), " observations removed due to choices not in ind. market choice set."))
  print(paste0(nhh - length(unique(ExplDat$hhid)), " households removed due to choices not in ind. market choice set."))
  nr <- nrow(PlanDat)
  PlanDat <- PlanDat[(plan_id %in% unique(ExplDat$plan_id) | plan_id == "Outside_Option")]
  print(paste0(nr - nrow(PlanDat), " plans removed due to never existing in choice set."))
  
  ## Generate PremVec.Init if it is not given
  if(is.null(PremVec.Init)){
    PremVec.Init = ifelse(PlanDat$plan_id == "Outside_Option", 0, 1)
  }
  
  # Preliminaries before while loop starts
  ## Plug in initial price vector
  PlanDat$price <- PremVec.Init
  avepercentdiff <- 1
  r <- 0
  
  while(r < maxR & avepercentdiff > tol){
    r <- r + 1
    print(paste0("Starting step", r))
    
    # Step Zero: Merge price vector into exploded dataset and create individual prices:
    ExplDat <- merge(
      ExplDat,
      PlanDat[, .(plan_id, price)],
      by = "plan_id",
      all.x = T)
                     
    AnyIndMkt <- sum(ExplDat$hhid > 0)  
    AnySGMkt <- sum(ExplDat$hhid < 0)
    
    ExplDat_List <- list()
    if(AnyIndMkt) ExplDat_List$IM <- ExplDat[hhid > 0]
    if(AnySGMkt) ExplDat_List$SG <- ExplDat[hhid < 0]
    
    subsidytype_vec <- c(subsidytypeInd, subsidytypeSG)
 
    for(i in seq_along(ExplDat_List)){              
      if(subsidytype_vec[i] == "Ratio"){
        ExplDat_List[[i]][, price_hh := ratingfactor * (1 - subs) * price, ]
      } else if(subsidytype_vec[i] == "Fixed"){
        ExplDat_List[[i]][, price_hh := ifelse(hhid < 0, 
          pmax(ratingfactor * (1 - (best_guess_sumrate / 100)) * price - subs, 0), 
          pmax(ratingfactor * price - subs, 0)), ] 
        # Note: subs in this case is (1-t)*v * base rate before
      } else if(subsidytype_vec[i] == "SCSAdjust"){
        if(r == 1){    
          if(sum(ExplDat_List[[i]]$scs) != nrow(unique(ExplDat_List[[i]][, .(hhid, year), ]))){
            stop("Each household needs exactly one second cheapest silver plan")
          }
        }  
        ExplDat_List[[i]][, price_scsr := sum(scs * ratingfactor * price), by = c("hhid", "year")]     
        ExplDat_List[[i]][, subs := pmax(subs_scs0 + (price_scsr - price_scs0), 0), ] 
        ExplDat_List[[i]][, price_scsr := NULL, ]
        ExplDat_List[[i]][, price_hh := ifelse(hhid < 0, 
          pmax(ratingfactor * (1 - (best_guess_sumrate / 100)) * price - subs, 0), 
          pmax(ratingfactor * price - subs, 0)), ] 
      } else {
        stop("subsidytype must be one of: Ratio, Fixed, SCSAdjust")
      }
    }
    ExplDat <- do.call("rbind", ExplDat_List)
    
    # Step One: Compute household expected utility for each plan:
    ExplDat[, V := x_av + .5 * omega * (x_av ^ 2) - (alpha - psi) * price_hh + fixedeffect_beta0, ]
    
    # Step Two: Compute pridicted plan shares (within each household)
    ExplDat[, sumexpV := sum(exp(V)), by = c("hhid", "year")]
    ExplDat[, share := exp(V) / sumexpV, ]
    
    # Step Three: Compute expected costs for household i under plan j:
    ExplDat[, costbar := pmin((x_av + omega * (x_av ^ 2)) / alpha, costwinsorization)]
    ## Note: this could be done outside of the while loop.  It is the in each loop.
    
    # Step Four: Compute total cost to the insurer of the plan and the new price
    CostPreAdjustment <- ExplDat[plan_id != "Outside_Option", .(
      cost = sum(share * kappa * costbar),
      ratingfactorsharesum = sum(share * ratingfactor)
    ), by = "plan_id"]
    
    PlanDat <- merge(PlanDat,
                     CostPreAdjustment,
                     by = "plan_id",
                     all.x = T
    )
    
    PlanDat[, pricenew := (1 + fixed_markup) * (beta_4 * cost + fe_beta_5) / ratingfactorsharesum * (1 + behaviorShare), ]
    if(sum(is.na(PlanDat$pricenew)) != 1){ stop("Missing new price")}
    PlanDat[, pricenew := ifelse(is.na(pricenew), 0, pricenew), ]
    
    # Step Five: Test for convergence
    avepercentdiff <- mean(abs((PlanDat[pricenew != 0]$pricenew - PlanDat[pricenew != 0]$price) / PlanDat[pricenew != 0]$price))
    print(paste0("Average Percent Change is ", avepercentdiff))
    
    # Step Six: Redefine price and clean
    PlanDat <- PlanDat[, .(
      plan_id,
      beta_4,
      fe_beta_5,
      price = pmin(pricenew, pricelimit)
    )]
    
    # Cleaning
    if(r < maxR & avepercentdiff > tol){
      vars_to_keep <- c("orig_subs_id", "year", "hhid", "plan_id", "best_guess_AV_old", 
      "best_guess_netprem_old", "best_guess_sumrate", "grossprem_old", "ratingfactor", 
      "subs", "x_av", "alpha", "omega", "psi", "fixedeffect_beta0", "kappa", "sum_concurrent_risk", 
      "in_orig_cs1","in_orig_cs2")
      # "old" variables are only for table calculation
      if(any(subsidytype_vec %in% c("SCSAdjust"))){
        vars_to_keep <- c(vars_to_keep, "scs", "price_scs0", "subs_scs0") 
      }
      
      ExplDat <- ExplDat[, vars_to_keep , with = F]
    }
  }
  
  return(CounterfactualList = list(
    ExplodedData = ExplDat,
    PlanData = PlanData
  ))
}

# * # * # * # * # * # * # * # * #
#       Table Generation        #
# * # * # * # * # * # * # * # * #

generateOutcomeColumns_alpha <- function(
  CounterFactualResults,
  costwinsorization = 89.37,
  SGpremsubs = F, 
  subsidytypeSG = "Ratio",
  subsidytypeInd = "Ratio"
){

  print("Creating HH level results.")
  posttab <- CounterFactualResults$ExplodedData[, .(
    acg = mode1(sum_concurrent_risk),
    CS = mode1(1 / (alpha - psi)) * log(sum(exp(V))),
    p_1 = ifelse(sum(in_orig_cs1) >= 1, 1, 0),
    p_2 = ifelse(sum(in_orig_cs2) >= 1, 1, 0),
    PUninsured = sum(share * as.numeric(x_av == 0)),
    PBronze = sum(share * as.numeric(x_av == 0.6)),
    PSilver = sum(share * as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94))),
    PGold = sum(share * as.numeric(x_av == 0.8)),
    AveBronzePremNet = sum(as.numeric(x_av == 0.6) * ifelse(is.na(price_hh), 0, price_hh) * share) / sum(as.numeric(x_av == 0.6) * share), 
    AveSilverPremNet = sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * ifelse(is.na(price_hh), 0, price_hh) * share) / sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * share),
    AveGoldPremNet = sum(as.numeric(x_av == 0.8) * ifelse(is.na(price_hh), 0, price_hh) * share) / sum(as.numeric(x_av == 0.8) * share), 
    AveBronzeSubsidy = sum(as.numeric(x_av == 0.6) * (ratingfactor * price - ifelse(is.na(price_hh), 0, price_hh)) * share) / sum(as.numeric(x_av == 0.6) * share), 
    AveSilverSubsidy = sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * (ratingfactor * price - ifelse(is.na(price_hh), 0, price_hh)) * share) / sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * share), 
    AveGoldSubsidy = sum(as.numeric(x_av == 0.8) * (ratingfactor * price - ifelse(is.na(price_hh), 0, price_hh)) * share) / sum(as.numeric(x_av == 0.8) * share), 
    AveBronzeBaseGross = sum(as.numeric(x_av == 0.6) * ifelse(is.na(price), 0, price) * share) / sum(as.numeric(x_av == 0.6) * share), 
    AveSilverBaseGross = sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * ifelse(is.na(price), 0, price) * share) / sum(as.numeric(x_av %in% c(0.7, 0.73, 0.87, 0.94)) * share), 
    AveGoldBaseGross = sum(as.numeric(x_av == 0.8) * ifelse(is.na(price), 0, price) * share) / sum(as.numeric(x_av == 0.8) * share), 
    GovmntSpend = ifelse(SGpremsubs == T | hhid > 0, 
      ifelse(subsidytypeInd == "Ratio",
        sum(share * subs * ratingfactor * price), 
        sum(share * ifelse(subs < ratingfactor * price, subs, ratingfactor * price))), 
      sum(share * (best_guess_sumrate / 100) * ratingfactor * price)), 
    EmployerSpend = ifelse(SGpremsubs == T | hhid > 0, 
      0, 
      ifelse(subsidytypeSG == "Ratio", 
        sum(share * (share / ( 1 - share)) * price_hh), 
        sum(share * ifelse(subs < (1 - (best_guess_sumrate / 100)) * ratingfactor * price, subs, (1 - (best_guess_sumrate / 100)) * ratingfactor * price)))),
    InPreSample = mode1(!is.na(grossprem_old))
  ),
  by = c("hhid", "year")]
  
  posttab[, Market := ifelse(hhid < 0, "SmallGroup", "Individual")]
  posttab[, CS_1 := ifelse(p_1 == 1, CS, NA), ]
  posttab[, CS_2 := ifelse(p_2 == 1, CS, NA), ]
  posttab[, CS_2_highacg := ifelse(p_2 == 1 & acg > acgHL_boundary, CS, NA), ]
  posttab[, CS_2_lowacg := ifelse(p_2 == 1 & acg <= acgHL_boundary, CS, NA), ]
  
  print(paste0("Ind Mkt N: ", nrow(posttab[Market == "Individual"])))
  print(paste0("SG Mkt N: ", nrow(posttab[Market == "SmallGroup"])))
  print(paste0("Ind Mkt N HHs used for CS calculation HERE IMPORTANT: ", nrow(posttab[Market == "Individual" & p_2 == 1])))
  print(paste0("SG Mkt N HHs used for CS calculation HERE IMPORTANT: ", nrow(posttab[Market == "SmallGroup" & p_2 == 1])))
  
  print("Creating HH level results.")
  Results_List <- list()
  
  # Set variables to use
  welfare_vars_list <- list(ind_mkt = c("CS_1", "CS_2", "GovmntSpend", "CS_2_lowacg", "CS_2_highacg"), 
    sg_mkt = c("CS_1", "CS_2", "GovmntSpend", "EmployerSpend", "CS_2_lowacg", "CS_2_highacg"))
  mktshr_vars <- c("PUninsured", "PBronze", "PSilver", "PGold")
  grossprem_vars <- c("AveBronzeBaseGross", "AveSilverBaseGross", "AveGoldBaseGross")
  netprem_vars <- c("AveBronzePremNet", "AveSilverPremNet", "AveGoldPremNet") 
  subsidy_vars <- c("AveBronzeSubsidy", "AveSilverSubsidy", "AveGoldSubsidy")
  
  markets <- list("Individual", "SmallGroup", c("Individual", "SmallGroup"))
  for(i in 1:2){
    Results_List[[i]] <- list()
    Results_List[[i]]$welfare <- unlist(lapply(posttab[Market %in% markets[[i]], welfare_vars_list[[i]], with = F], mean, na.rm = T))
    Results_List[[i]]$mktshr <- unlist(lapply(posttab[Market %in% markets[[i]], mktshr_vars, with = F], mean, na.rm = T))
    Results_List[[i]]$grossprem <- with(posttab[Market %in% markets[[i]]], 
      c(weighted.mean(AveBronzeBaseGross, PBronze, na.rm = T), 
      weighted.mean(AveSilverBaseGross, PSilver, na.rm = T),
      weighted.mean(AveGoldBaseGross, PGold, na.rm = T)))
    Results_List[[i]]$netprem <- with(posttab[Market %in% markets[[i]]], 
      c(weighted.mean(AveBronzePremNet, PBronze, na.rm = T), 
      weighted.mean(AveSilverPremNet, PSilver, na.rm = T),
      weighted.mean(AveGoldPremNet, PGold, na.rm = T)))
    Results_List[[i]]$subsidy <- with(posttab[Market %in% markets[[i]]], 
      c(weighted.mean(AveBronzeSubsidy, PBronze, na.rm = T), 
      weighted.mean(AveSilverSubsidy, PSilver, na.rm = T),
      weighted.mean(AveGoldSubsidy, PGold, na.rm = T)))
    }

  return(Results_List)
}


generateSGOutcomeColumnsPre_alpha <- function(
  CounterFactualResults,
  costwinsorization = 89.37,
  SGpremsubs = F, 
  subsidytypeSG = "Ratio", 
  subsidytypeInd = "Ratio"
){

  print("Creating HH level results.")
  HHlevelDat <- CounterFactualResults$ExplodedData[, .(
    p_1 = ifelse(sum(in_orig_cs1) >= 1, 1, 0),
    p_2 = ifelse(sum(in_orig_cs2) >= 1, 1, 0),
    acg = mode1(sum_concurrent_risk),
    psi = mode1(psi),
    alpha = mode1(alpha),
    omega = mode1(omega),
    subs = mode1(subs),
    best_guess_AV_old = mode1(best_guess_AV_old) / 100,
    best_guess_sumrate = mode1(best_guess_sumrate) / 100,
    best_guess_netprem_old = mode1(best_guess_netprem_old) / 100,
    grossprem_old = mode1(grossprem_old) / 100,
    ratingfactor = mode1(ratingfactor),
    InPreSample = mode1(!is.na(grossprem_old))),
    by = c("hhid", "year")]
  HHlevelDat[, Market := ifelse(hhid < 0, "SmallGroup", "Individual")]
  
  foo <- HHlevelDat[Market == "SmallGroup" & InPreSample == T]
  print(paste0("SG Base N:", nrow(foo)))
  print("Creating HH level results.")
  Results_List <- list()
  
  # Set variables to use
  welfare_vars_list <- list(ind_mkt = c("CS_1", "CS_2", "GovmntSpend", "CS_2_lowacg", "CS_2_highacg"), 
    sg_mkt = c("CS_1", "CS_2", "GovmntSpend", "EmployerSpend", "CS_2_lowacg", "CS_2_highacg"))
  mktshr_vars <- c("PUninsured", "PBronze", "PSilver", "PGold")
  grossprem_vars <- c("AveBronzeBaseGross", "AveSilverBaseGross", "AveGoldBaseGross")
  netprem_vars <- c("AveBronzePremNet", "AveSilverPremNet", "AveGoldPremNet") 
  subsidy_vars <- c("AveBronzeSubsidy", "AveSilverSubsidy", "AveGoldSubsidy")
    
  Results <- list()

  Results$welfare <- with(foo, 
    c(
      NA, 
      NA, 
      mean(best_guess_sumrate * grossprem_old, na.rm = T), 
      mean(nu * (1 - (best_guess_sumrate)) * grossprem_old, na.rm = T), 
      NA, 
      NA))

  Results$mktshr <- with(foo, 
    c(0.000, 
    sum(best_guess_AV_old == 0.6, na.rm = T) / sum(!is.na(best_guess_AV_old)),
    sum(best_guess_AV_old %in% c(0.7, 0.73, 0.87), na.rm = T) / sum(!is.na(best_guess_AV_old)),
    sum(best_guess_AV_old == 0.8, na.rm = T) / sum(!is.na(best_guess_AV_old))))

  Results$grossprem <- with(foo, 
    c(sum(grossprem_old / ratingfactor * as.numeric(best_guess_AV_old == 0.6), na.rm = T) / sum (best_guess_AV_old == 0.6, na.rm = T),
    sum(grossprem_old / ratingfactor * as.numeric(best_guess_AV_old %in% c(0.7, 0.73, 0.87)), na.rm = T) / sum(best_guess_AV_old %in% c(0.7, 0.73, 0.87), na.rm = T),
    sum(grossprem_old / ratingfactor * as.numeric(best_guess_AV_old == 0.8), na.rm = T) / sum(best_guess_AV_old == 0.8, na.rm = T)))

  Results$netprem <- with(foo, 
    c(sum((grossprem_old * (1 - nu) * (1 - best_guess_sumrate)) * as.numeric(best_guess_AV_old == 0.6), na.rm = T) / sum(best_guess_AV_old == 0.6, na.rm = T),
    sum((grossprem_old * (1 - nu) * (1 - best_guess_sumrate)) * as.numeric(best_guess_AV_old %in% c(0.7, 0.73, 0.87)), na.rm = T) / sum(best_guess_AV_old %in% c(0.7, 0.73, 0.87), na.rm = T),
    sum((grossprem_old * (1 - nu) * (1 - best_guess_sumrate)) * as.numeric(best_guess_AV_old == 0.8), na.rm = T) / sum(best_guess_AV_old == 0.8, na.rm = T)))

  Results$subsidy <- with(foo, 
    c(sum((grossprem_old * (1 - ((1 - nu) * (1 - best_guess_sumrate)))) * as.numeric(best_guess_AV_old == 0.6), na.rm = T) / sum (best_guess_AV_old == 0.6, na.rm = T),
    sum((grossprem_old * (1 - ((1 - nu) * (1 - best_guess_sumrate)))) * as.numeric(best_guess_AV_old %in% c(0.7, 0.73, 0.87)), na.rm = T) / sum(best_guess_AV_old %in% c(0.7, 0.73, 0.87), na.rm = T),
    sum((grossprem_old * (1 - ((1 - nu) * (1 - best_guess_sumrate)))) * as.numeric(best_guess_AV_old == 0.8), na.rm = T) / sum(best_guess_AV_old == 0.8, na.rm = T)))

  return(Results)
}
